patches, i.e. both dom0 and domU boots work as usual.
In PAE mode dom0 boot works and seems to be stable, running
linux kernel builds with -j12 at the moment ;)
Actually using more than 4GB isn't tested yet, my machine has
only one GB. Also this needs a patch for the e820 code in
xen, right now xen will not even try to use memory above 4GB.
Signed-off-by: Gerd Knorr <kraxel@suse.de>
Select this if you have a 32-bit processor and between 1 and 4
gigabytes of physical RAM.
-#config HIGHMEM64G
-# bool "64GB"
-# help
-# Select this if you have a 32-bit processor and more than 4
-# gigabytes of physical RAM.
+config HIGHMEM64G
+ bool "64GB"
+ help
+ Select this if you have a 32-bit processor and more than 4
+ gigabytes of physical RAM.
endchoice
.ascii "GUEST_OS=linux,GUEST_VER=2.6"
.ascii ",XEN_VER=3.0"
.ascii ",VIRT_BASE=0xC0000000"
+#ifdef CONFIG_X86_PAE
+ .ascii ",PAE=yes"
+#else
+ .ascii ",PAE=no"
+#endif
.ascii ",LOADER=generic"
.byte 0
.quad 0x0000000000000000 /* 0x53 reserved */
.quad 0x0000000000000000 /* 0x5b reserved */
+#ifdef CONFIG_X86_PAE
+ .quad 0x00cfbb00000067ff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cfb300000067ff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x00cffb00000067ff /* 0x73 user 4GB code at 0x00000000 */
+ .quad 0x00cff300000067ff /* 0x7b user 4GB data at 0x00000000 */
+#else
.quad 0x00cfbb000000c3ff /* 0x60 kernel 4GB code at 0x00000000 */
.quad 0x00cfb3000000c3ff /* 0x68 kernel 4GB data at 0x00000000 */
.quad 0x00cffb000000c3ff /* 0x73 user 4GB code at 0x00000000 */
.quad 0x00cff3000000c3ff /* 0x7b user 4GB data at 0x00000000 */
+#endif
.quad 0x0000000000000000 /* 0x80 TSS descriptor */
.quad 0x0000000000000000 /* 0x88 LDT descriptor */
#elif defined(CONFIG_X86_64)
#define pmd_val_ma(v) (v).pmd
#else
-#define pmd_val_ma(v) (v).pud.pgd.pgd
+#ifdef CONFIG_X86_PAE
+# define pmd_val_ma(v) ((v).pmd)
+# define pud_val_ma(v) ((v).pgd.pgd)
+#else
+# define pmd_val_ma(v) ((v).pud.pgd.pgd)
+#endif
#endif
#ifndef CONFIG_XEN_SHADOW_MODE
-void xen_l1_entry_update(pte_t *ptr, unsigned long val)
+void xen_l1_entry_update(pte_t *ptr, pte_t val)
{
mmu_update_t u;
u.ptr = virt_to_machine(ptr);
- u.val = val;
+ u.val = pte_val_ma(val);
BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
}
+#ifdef CONFIG_X86_PAE
+void xen_l3_entry_update(pud_t *ptr, pud_t val)
+{
+ mmu_update_t u;
+ u.ptr = virt_to_machine(ptr);
+ u.val = pud_val_ma(val);
+ BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
+}
+#endif
+
#ifdef CONFIG_X86_64
void xen_l3_entry_update(pud_t *ptr, pud_t val)
{
struct mmuext_op op;
#ifdef CONFIG_X86_64
op.cmd = MMUEXT_PIN_L4_TABLE;
+#elif defined(CONFIG_X86_PAE)
+ op.cmd = MMUEXT_PIN_L3_TABLE;
#else
op.cmd = MMUEXT_PIN_L2_TABLE;
#endif
{
pud_t *pud;
pmd_t *pmd_table;
-
+
#ifdef CONFIG_X86_PAE
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+ make_page_readonly(pmd_table);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
if (pmd_table != pmd_offset(pud, 0))
pmd_idx = pmd_index(vaddr);
pgd = pgd_base + pgd_idx;
- for ( ; (pgd_idx < PTRS_PER_PGD_NO_HV) && (vaddr != end); pgd++, pgd_idx++) {
+ for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
if (pgd_none(*pgd))
one_md_table_init(pgd);
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (pmd_none(*pmd))
+ if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd))
one_page_table_init(pmd);
vaddr += PMD_SIZE;
pmd_idx = pmd_index(PAGE_OFFSET);
pte_ofs = pte_index(PAGE_OFFSET);
- for (; pgd_idx < PTRS_PER_PGD_NO_HV; pgd++, pgd_idx++) {
+ for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
+#ifdef CONFIG_XEN
+ /*
+ * Native linux hasn't PAE-paging enabled yet at this
+ * point. When running as xen domain we are in PAE
+ * mode already, thus we can't simply hook a empty
+ * pmd. That would kill the mappings we are currently
+ * using ...
+ */
+ pmd = pmd_offset(pud_offset(pgd, PAGE_OFFSET), PAGE_OFFSET);
+#else
pmd = one_md_table_init(pgd);
+#endif
if (pfn >= max_low_pfn)
continue;
pmd += pmd_idx;
for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
+ if (address >= HYPERVISOR_VIRT_START)
+ continue;
/* Map with big pages if possible, otherwise create normal page tables. */
if (cpu_has_pse) {
* page directory, write-protect the new page directory, then switch to
* it. We clean up by write-enabling and then freeing the old page dir.
*/
+#ifndef CONFIG_X86_PAE
memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
make_page_readonly(pgd_base);
xen_pgd_pin(__pa(pgd_base));
make_page_writable(old_pgd);
__flush_tlb_all();
free_bootmem(__pa(old_pgd), PAGE_SIZE);
- init_mm.context.pinned = 1;
+#else
+ {
+ pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
+ pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
+ pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
+
+ memcpy(new_pmd, old_pmd, PAGE_SIZE);
+ memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
+ set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
+
+ make_page_readonly(new_pmd);
+ make_page_readonly(pgd_base);
+ xen_pgd_pin(__pa(pgd_base));
+ load_cr3(pgd_base);
+ xen_pgd_unpin(__pa(old_pgd));
+ make_page_writable(old_pgd);
+ make_page_writable(old_pmd);
+ __flush_tlb_all();
+
+ free_bootmem(__pa(old_pgd), PAGE_SIZE);
+ free_bootmem(__pa(old_pmd), PAGE_SIZE);
+ }
+#endif
+ init_mm.context.pinned = 1;
kernel_physical_mapping_init(pgd_base);
remap_numa_kva();
permanent_kmaps_init(pgd_base);
-#ifdef CONFIG_X86_PAE
+#if 0 /* def CONFIG_X86_PAE */
/*
* Add low memory identity-mappings - SMP needs it when
* starting up on an AP from real-mode. In the non-PAE
* All user-space mappings are explicitly cleared after
* SMP startup.
*/
- pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+ set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
#endif
}
* us, because pgd_clear() is a no-op on i386.
*/
for (i = 0; i < USER_PTRS_PER_PGD; i++)
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
set_pgd(swapper_pg_dir+i, __pgd(0));
pagetable_init();
-#ifdef CONFIG_X86_PAE
+#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
/*
* We will bail out later - printk doesn't work right now so
* the user would just see a hanging kernel.
+ * when running as xen domain we are already in PAE mode at
+ * this point.
*/
if (cpu_has_pae)
set_in_cr4(X86_CR4_PAE);
panic("pgtable_cache_init(): cannot create pmd cache");
}
pgd_cache = kmem_cache_create("pgd",
+#if 0 /* How the heck _this_ works in native linux ??? */
PTRS_PER_PGD*sizeof(pgd_t),
PTRS_PER_PGD*sizeof(pgd_t),
+#else
+ PAGE_SIZE,
+ PAGE_SIZE,
+#endif
0,
pgd_ctor,
pgd_dtor);
if (!HAVE_SHARED_KERNEL_PMD) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
make_page_writable(pmd);
+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
kmem_cache_free(pmd_cache, pmd);
}
}
#include <linux/config.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/kernel.h>
+#include <asm/bug.h>
#include <asm-xen/xen-public/xen.h>
#include <asm-xen/foreign_page.h>
typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t;
typedef struct { unsigned long long pgprot; } pgprot_t;
-#define pmd_val(x) ((x).pmd)
-#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
-#define __pmd(x) ((pmd_t) { (x) } )
+#define __pte(x) ({ unsigned long long _x = (x); \
+ (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pgd(x) ({ unsigned long long _x = (x); \
+ (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+#define __pmd(x) ({ unsigned long long _x = (x); \
+ (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); })
+static inline unsigned long long pte_val(pte_t x)
+{
+ unsigned long long ret;
+
+ if (x.pte_low) {
+ ret = x.pte_low | (unsigned long long)x.pte_high << 32;
+ ret = machine_to_phys(ret) | 1;
+ } else {
+ ret = 0;
+ }
+ return ret;
+}
+static inline unsigned long long pmd_val(pmd_t x)
+{
+ unsigned long long ret = x.pmd;
+ if (ret) ret = machine_to_phys(ret) | 1;
+ return ret;
+}
+static inline unsigned long long pgd_val(pgd_t x)
+{
+ unsigned long long ret = x.pgd;
+ if (ret) ret = machine_to_phys(ret) | 1;
+ return ret;
+}
+static inline unsigned long long pte_val_ma(pte_t x)
+{
+ return (unsigned long long)x.pte_high << 32 | x.pte_low;
+}
#define HPAGE_SHIFT 21
#else
typedef struct { unsigned long pte_low; } pte_t;
#define pte_val(x) (((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \
(x).pte_low)
#define pte_val_ma(x) ((x).pte_low)
+#define __pte(x) ({ unsigned long _x = (x); \
+ (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+#define __pgd(x) ({ unsigned long _x = (x); \
+ (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+static inline unsigned long pgd_val(pgd_t x)
+{
+ unsigned long ret = x.pgd;
+ if (ret) ret = machine_to_phys(ret) | 1;
+ return ret;
+}
#define HPAGE_SHIFT 22
#endif
#define PTE_MASK PAGE_MASK
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
-
-static inline unsigned long pgd_val(pgd_t x)
-{
- unsigned long ret = x.pgd;
- if (ret) ret = machine_to_phys(ret);
- return ret;
-}
#define pgprot_val(x) ((x).pgprot)
-#define __pte(x) ({ unsigned long _x = (x); \
- (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
#define __pte_ma(x) ((pte_t) { (x) } )
-#define __pgd(x) ({ unsigned long _x = (x); \
- (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
#define __pgprot(x) ((pgprot_t) { (x) } )
#endif /* !__ASSEMBLY__ */
if ( likely((__vma)->vm_mm == current->mm) ) { \
HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
} else { \
- xen_l1_entry_update((__ptep), (__entry).pte_low); \
+ xen_l1_entry_update((__ptep), (__entry)); \
flush_tlb_page((__vma), (__address)); \
} \
} \
HYPERVISOR_update_va_mapping((__address), \
__entry, 0); \
} else { \
- xen_l1_entry_update((__ptep), (__entry).pte_low); \
+ xen_l1_entry_update((__ptep), (__entry)); \
} \
} while (0)
#include <asm/ptrace.h>
#include <asm/page.h>
#if defined(__i386__)
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-#include <asm-generic/pgtable-nopmd.h>
-#endif
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+# ifdef CONFIG_X86_PAE
+# include <asm-generic/pgtable-nopud.h>
+# else
+# include <asm-generic/pgtable-nopmd.h>
+# endif
+# endif
#endif
/* arch/xen/i386/kernel/setup.c */
void xen_invlpg(unsigned long ptr);
#ifndef CONFIG_XEN_SHADOW_MODE
-void xen_l1_entry_update(pte_t *ptr, unsigned long val);
+void xen_l1_entry_update(pte_t *ptr, pte_t val);
void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
-#ifdef __x86_64__
-void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64 only */
-#endif
+void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
void xen_pgd_pin(unsigned long ptr);
void xen_pgd_unpin(unsigned long ptr);